Subfusion
逐元素计算两个输入数组的减法运算,支持三种模式:带缩放因子的减法、减法后应用 ReLU 激活、减法后应用 ReLU6 激活。
模式1 - 带缩放因子的减法(subext):
\[\text{output}_i = \text{input0}_i - \text{input1}_i \times \alpha\]
模式2 - 减法后应用 ReLU(subrelu):
\[\text{output}_i = \max(0, \text{input0}_i - \text{input1}_i)\]
模式3 - 减法后应用 ReLU6(subrelu6):
\[\text{output}_i = \min(\max(0, \text{input0}_i - \text{input1}_i), 6)\]
- 输入:
input0 - 第一个输入数据地址。
input1 - 第二个输入数据地址。
alpha - 缩放因子(仅 subext 模式需要),用于缩放 input1。
size - 计算长度(对于复数类型,指复数的个数)。
core_mask - 核掩码(仅共享存储版本需要)。
- 输出:
output - 计算结果地址,其大小与输入相同。
- 支持平台:
FT78NEMT7004
备注
FT78NE 支持fp32, int8, int16, int32, fp64, cplx64, cplx128
MT7004 支持fp16, fp32, int16, int32, cplx64
共享存储版本:
subext(带缩放因子):
-
void hp_subext_s(half *input0, half *input1, half alpha, half *output, int size, int core_mask)
-
void fp_subext_s(float *input0, float *input1, float alpha, float *output, int size, int core_mask)
-
void dp_subext_s(double *input0, double *input1, double alpha, double *output, int size, int core_mask)
-
void c64_subext_s(float *input0, float *input1, float alpha, float *output, int size, int core_mask)
-
void c128_subext_s(double *input0, double *input1, double alpha, double *output, int size, int core_mask)
subrelu(减法+ReLU):
-
void i8_subrelu_s(int8_t *input0, int8_t *input1, int8_t *output, int size, int core_mask)
-
void i16_subrelu_s(int16_t *input0, int16_t *input1, int16_t *output, int size, int core_mask)
-
void i32_subrelu_s(int32_t *input0, int32_t *input1, int32_t *output, int size, int core_mask)
-
void hp_subrelu_s(half *input0, half *input1, half *output, int size, int core_mask)
-
void fp_subrelu_s(float *input0, float *input1, float *output, int size, int core_mask)
-
void dp_subrelu_s(double *input0, double *input1, double *output, int size, int core_mask)
-
void c64_subrelu_s(float *input0, float *input1, float *output, int size, int core_mask)
-
void c128_subrelu_s(double *input0, double *input1, double *output, int size, int core_mask)
subrelu6(减法+ReLU6):
-
void i8_subrelu6_s(int8_t *input0, int8_t *input1, int8_t *output, int size, int core_mask)
-
void i16_subrelu6_s(int16_t *input0, int16_t *input1, int16_t *output, int size, int core_mask)
-
void i32_subrelu6_s(int32_t *input0, int32_t *input1, int32_t *output, int size, int core_mask)
-
void hp_subrelu6_s(half *input0, half *input1, half *output, int size, int core_mask)
-
void fp_subrelu6_s(float *input0, float *input1, float *output, int size, int core_mask)
-
void dp_subrelu6_s(double *input0, double *input1, double *output, int size, int core_mask)
-
void c64_subrelu6_s(float *input0, float *input1, float *output, int size, int core_mask)
-
void c128_subrelu6_s(double *input0, double *input1, double *output, int size, int core_mask)
C调用示例(subext):
1//FT78NE示例
2#include <stdio.h>
3#include <subfusion.h>
4
5int main(int argc, char* argv[]) {
6 float *input0 = (float *)0xA0000000; // 第一个输入在DDR空间
7 float *input1 = (float *)0xA1000000; // 第二个输入在DDR空间
8 float *output = (float *)0xB0000000; // output
9 float alpha = 0.5f; // 缩放因子
10 int size = 1000;
11 int core_mask = 0xff;
12 fp_subext_s(input0, input1, alpha, output, size, core_mask);
13 return 0;
14}
C调用示例(subrelu):
1//FT78NE示例
2#include <stdio.h>
3#include <subfusion.h>
4
5int main(int argc, char* argv[]) {
6 float *input0 = (float *)0xA0000000;
7 float *input1 = (float *)0xA1000000;
8 float *output = (float *)0xB0000000;
9 int size = 1000;
10 int core_mask = 0xff;
11 fp_subrelu_s(input0, input1, output, size, core_mask);
12 return 0;
13}
C调用示例(subrelu6):
1//FT78NE示例
2#include <stdio.h>
3#include <subfusion.h>
4
5int main(int argc, char* argv[]) {
6 float *input0 = (float *)0xA0000000;
7 float *input1 = (float *)0xA1000000;
8 float *output = (float *)0xB0000000;
9 int size = 1000;
10 int core_mask = 0xff;
11 fp_subrelu6_s(input0, input1, output, size, core_mask);
12 return 0;
13}
私有存储版本:
subext(带缩放因子):
-
void hp_subext_p(half *input0, half *input1, half alpha, half *output, int size)
-
void fp_subext_p(float *input0, float *input1, float alpha, float *output, int size)
-
void dp_subext_p(double *input0, double *input1, double alpha, double *output, int size)
-
void c64_subext_p(float *input0, float *input1, float alpha, float *output, int size)
-
void c128_subext_p(double *input0, double *input1, double alpha, double *output, int size)
subrelu(减法+ReLU):
-
void i8_subrelu_p(int8_t *input0, int8_t *input1, int8_t *output, int size)
-
void i16_subrelu_p(int16_t *input0, int16_t *input1, int16_t *output, int size)
-
void i32_subrelu_p(int32_t *input0, int32_t *input1, int32_t *output, int size)
-
void hp_subrelu_p(half *input0, half *input1, half *output, int size)
-
void fp_subrelu_p(float *input0, float *input1, float *output, int size)
-
void dp_subrelu_p(double *input0, double *input1, double *output, int size)
-
void c64_subrelu_p(float *input0, float *input1, float *output, int size)
-
void c128_subrelu_p(double *input0, double *input1, double *output, int size)
subrelu6(减法+ReLU6):
-
void i8_subrelu6_p(int8_t *input0, int8_t *input1, int8_t *output, int size)
-
void i16_subrelu6_p(int16_t *input0, int16_t *input1, int16_t *output, int size)
-
void i32_subrelu6_p(int32_t *input0, int32_t *input1, int32_t *output, int size)
-
void hp_subrelu6_p(half *input0, half *input1, half *output, int size)
-
void fp_subrelu6_p(float *input0, float *input1, float *output, int size)
-
void dp_subrelu6_p(double *input0, double *input1, double *output, int size)
-
void c64_subrelu6_p(float *input0, float *input1, float *output, int size)
-
void c128_subrelu6_p(double *input0, double *input1, double *output, int size)
C调用示例(私有存储版本):
1//FT78NE示例
2#include <stdio.h>
3#include <subfusion.h>
4
5int main(int argc, char* argv[]) {
6 float *input0 = (float *)0x10000000; // 第一个输入在L2空间
7 float *input1 = (float *)0x10001000; // 第二个输入在L2空间
8 float *output = (float *)0x10002000; // output
9 int size = 1000;
10 fp_subrelu_p(input0, input1, output, size);
11 return 0;
12}